Forecast for 2021 election

Code
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom        1.0.1     ✔ recipes      1.0.5
✔ dials        1.1.0     ✔ rsample      1.1.1
✔ dplyr        1.1.2     ✔ tibble       3.2.1
✔ ggplot2      3.4.0     ✔ tidyr        1.3.0
✔ infer        1.0.4     ✔ tune         1.0.1
✔ modeldata    1.1.0     ✔ workflows    1.1.3
✔ parsnip      1.0.4     ✔ workflowsets 1.0.0
✔ purrr        1.0.1     ✔ yardstick    1.1.0
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ purrr::discard() masks scales::discard()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
✖ recipes::step()  masks stats::step()
• Use suppressPackageStartupMessages() to eliminate package startup messages
Code
library(tidyverse)
── Attaching packages
───────────────────────────────────────
tidyverse 1.3.2 ──
✔ readr   2.1.4     ✔ forcats 0.5.2
✔ stringr 1.5.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ readr::col_factor() masks scales::col_factor()
✖ purrr::discard()    masks scales::discard()
✖ dplyr::filter()     masks stats::filter()
✖ stringr::fixed()    masks recipes::fixed()
✖ dplyr::lag()        masks stats::lag()
✖ readr::spec()       masks yardstick::spec()
Code
library(fs)
library(here)
here() starts at C:/Users/carlo/OneDrive/Documents/GitHub/MSc_Project
Code
library(glmnet)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loaded glmnet 4.1-6
Code
library(glue)

load models

Code
classification_model <- readRDS(here("6. Modelling","clustering_models","clustering_ranger_redux.rds"))

regression_parameters   <-  list()

for(i in 1:3){
  regression_parameters[[i]] <-readRDS(here("6. Modelling","elastic_net_eval",
                                 glue("cluster_delta_cluster_cluster{i-1}.rds"))) |>
      filter(RMSE_Overall==min(RMSE_Overall))
}
names(regression_parameters) <- 0:2

Data to train regression model(s)

Code
dataset <- read_csv(here("4. Data","consolidated_cluster.csv"))      |>
           filter(election_year!=2022)                       |>
           select(-any_of(c("Metro_Area")))              |>
            mutate(across(where(is.numeric), ~ replace_na(.x,0)))
Rows: 601 Columns: 65
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): DivisionNm, StateAb, Metro_Area, Metro
dbl (61): election_year, Year, Australian_Citizens, Age_Baby_Boomers, Age_Ge...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
clusters <- read_csv(here("4. Data","clusters.csv"))         |>
            select(-any_of(c("Metro_Area")))
Rows: 450 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): DivisionNm, Metro_Area
dbl (2): Year, cluster

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
dataset <- dataset |>
           left_join(clusters,by=c("DivisionNm"="DivisionNm","election_year"="Year"))

rm(clusters)

party_cols <- c("GRN","ALP","COAL","Other")

vote <- dataset |> select(DivisionNm,Year,election_year,StateAb,Metro,cluster,all_of(party_cols))

cluster_avg <- read_csv(here("4. Data","cluster_values.csv")) |>
             filter(Year!=2021) |>
             pivot_longer(-c(Year,cluster),
                          names_to = "Attribute",values_to="National") |>
             mutate(Attribute=str_replace_all(Attribute," - ","_"),
                    Attribute=str_replace_all(Attribute,"-","_"),
                    Attribute=str_squish(Attribute),
                    Attribute=str_replace_all(Attribute," ","_")) 
Rows: 9 Columns: 64
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (64): Year, cluster, Australian_Citizens, Age - Baby Boomers, Age - Gen ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
rest <- dataset |> 
        select(-all_of(party_cols)) |>
        select(-Metro,-StateAb,-election_year) |>
         pivot_longer(-c(DivisionNm,Year,cluster),
               names_to = "Attribute",values_to = "CED") |>
         mutate(Year=as.numeric(Year))                   |>
         left_join(cluster_avg,
            by=c("Year","Attribute","cluster")) |>
          select(-cluster) |>
          mutate(Value=CED-National,.keep="unused") |>
          pivot_wider(names_from = Attribute, values_from = Value)


dataset <- vote |>
           left_join(rest,by=c("DivisionNm","Year")) |>
           mutate(Division=str_c(DivisionNm,"-",election_year),.keep="unused") |>
          select(-any_of(c("Year","Household_Semi_detached"))) |>
          select(-any_of(c("StateAb","Metro")))


rm(rest,vote)
Code
regression_models <- list()
id_col <- "Division"

for(i in c(1:3)){
  cluster_nbr <- i-1

  predictors <- regression_parameters[[i]]$coefs[[1]] |>
                filter(str_detect(covariate,"Intercept",TRUE)) |>
                pull(covariate)
  
  x.train <- dataset |>  column_to_rownames(id_col) |>  select(all_of(predictors)) 
  x.train <- model.matrix( ~ .+1, data = x.train)
  
  y.train <-  dataset |>  column_to_rownames(id_col) |>  select(all_of(party_cols)) |> as.matrix()

  regression_models[[i]] <- glmnet(x.train,y.train,
                        family = "mgaussian",
                        lambda = regression_parameters[[i]][1,]$lambda ,
                        alpha =  regression_parameters[[i]][1,]$alpha)
  
}
names(regression_models) <- 0:2

Forecasting 2021

Code
new_data <-  read_csv(here("4. Data","consolidated.csv"))      |>
           select(-any_of(party_cols))    |>
           filter(election_year==2022)                       |>
           select(-any_of(c("Year")))     |>
           mutate(Division = str_c(DivisionNm,"-",election_year),
                  .keep="unused",.before=1)                  |>
           mutate(Metro=case_when(
             Metro=="Yes" ~ 1,
             Metro=="No"  ~ 0
            ),.keep="unused") |>
           mutate(across(where(is.numeric), ~ replace_na(.x,0)))
Rows: 601 Columns: 65
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): DivisionNm, StateAb, Metro_Area, Metro
dbl (61): election_year, ALP, COAL, GRN, Other, Year, Australian_Citizens, A...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
clusters   <- new_data |> 
                    column_to_rownames("Division") |>
                   select(any_of(c("Language_English_Only","Metro")),
                   any_of(c("Household_Flat",
                           "Household_Standalone",
                           "Household_Owned_with_a_mortgage",
                           "Relationship_Non_dependent_Child",
                           "Relationship_Group_Household",
                           "Relationship_Child_under_15")))      

clusters<-        clusters|>
                  add_column(cluster= predict(classification_model,clusters)$.pred_class) |>
                  rownames_to_column("Division") |>
                  select(Division,cluster)
Code
library(leaflet)
library(sf)
Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
Code
map_data <- st_read(here("4. Data","CED_2021.gpkg")) |>
  left_join(clusters |>
            mutate(DivisionNm=str_remove(Division,"-2022")) |>
            mutate(DivisionNm=str_remove(DivisionNm," \\(II\\)")),
            by="DivisionNm") 
Reading layer `CED_2021' from data source 
  `C:\Users\carlo\OneDrive\Documents\GitHub\MSc_Project\4. Data\CED_2021.gpkg' 
  using driver `GPKG'
Simple feature collection with 151 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 96.81704 ymin: -43.65855 xmax: 167.9969 ymax: -9.219923
Geodetic CRS:  WGS 84
Code
clusters_colours <- ochRe::ochre_palettes[["lorikeet"]][1:3]
pal <- colorFactor(clusters_colours, 0:2)



map_data    |>
  leaflet() |>
  addPolygons(stroke = FALSE, smoothFactor = 0.3, fillOpacity = 1,     fillColor = ~pal(cluster),     label = ~glue::glue("{DivisionNm}: {cluster}")) |> 
  addLegend(pal = pal, values = ~cluster, opacity = 1.0)
Code
new_data <- new_data |>
            left_join(clusters,by="Division")
Code
prediction <- tibble()

for(i in c(1:3)){
  
   cluster_nbr <- i-1
   
   predictors <- regression_parameters[[i]]$coefs[[1]] |>
                 filter(str_detect(covariate,"Intercept",TRUE)) |>
                 pull(covariate)
   
   x.new <- new_data |> 
              filter(cluster==cluster_nbr) |>
              column_to_rownames("Division") |>
              select(all_of(predictors)) 
   
   
   x.new <- model.matrix( ~ .+1, data = x.new) 
  
   pred_i <- predict(regression_models[[i]], x.new) |>
             as_tibble(rownames="Division")         |>
             rename_with(~str_remove(.x,"\\.s0"))   |>
             mutate(cluster=cluster_nbr,.after=1)


   prediction <- bind_rows(prediction,pred_i)
     
}
Code
historic_cluster <- read_csv(here("4. Data","consolidated_cluster.csv"))      |>
                    filter(election_year!=2022)                       |>
                    select(any_of(c("GRN","COAL","ALP","Other")))              |>
                     mutate(across(where(is.numeric), ~ replace_na(.x,0)))
Rows: 601 Columns: 65
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): DivisionNm, StateAb, Metro_Area, Metro
dbl (61): election_year, Year, Australian_Citizens, Age_Baby_Boomers, Age_Ge...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
#news poll
primary_vote <- tribble(~PartyAb,~Avg.Vote,
                        "COAL", 35, 
                        "ALP",  36,
                        "GRN",  12,
                        "Other",17)




actual_vote <- read_csv(here("4. Data","primary_vote.csv")) |>
               filter(Year==2022)                           |>
               group_by(PartyAb,DivisionNm)                 |>
               summarise(Actual=sum(Percentage),.groups="drop")
Rows: 2880 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): StateAb, DivisionNm, PartyAb
dbl (3): Year, OrdinaryVotes, Percentage

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
prediction_compared <- prediction |>
  pivot_longer(-c(Division,cluster),
               names_to="PartyAb",
               values_to = "VoteDiff") |>
  mutate(DivisionNm=str_remove(Division,"-2022")) |>
  left_join(primary_vote,by="PartyAb") |>
  mutate(Predicted=Avg.Vote+VoteDiff) |>
  left_join(actual_vote,by=c("DivisionNm","PartyAb")) |>
  select(DivisionNm,PartyAb,cluster,Predicted,Actual) |>
  mutate(Error=Actual-Predicted)
Code
library(echarts4r)

prediction_compared$ref <- rnorm(nrow(prediction_compared))
auspol::party_colours()[party_cols]
      GRN       ALP      COAL     Other 
"#009C3D" "#E13940" "#1C4F9C" "#414141" 
Code
prediction_compared |>
  mutate(Div=glue::glue("{DivisionNm} ({cluster}): {PartyAb}")) |>
  group_by(PartyAb,cluster) |>
  e_charts(Error)       |>
   e_theme_custom('{"color":["#E13940","#1C4F9C","#009C3D","#414141"]}')|>
  e_scatter(ref,symbol_size = 10,bind=Div)   |>
  e_x_axis(min=-30,max=30) |>
  e_rm_axis(axis="y") |>
  e_facet(rows=4,cols=3)    |>
  e_tooltip(formatter = htmlwidgets::JS("
      function(params){
        return('<strong>' + params.name + 
                '</strong><br />error: ' + params.value[0]) 
                }
    ")) 
Code
prediction_compared |>
  mutate(Error=Error^2) |>
  group_by(PartyAb) |>
  summarise(RMSE=sqrt(mean(Error,na.rm=TRUE)),.groups="drop")

Samples

Code
library(auspol)

house_primary_historic_plot("Grayndler", parties =4, 
                            parties_year = 2022, 
                            include_others = TRUE )

Code
house_primary_historic_plot("Kooyong", parties =4, 
                            parties_year = 2022, 
                            include_others = TRUE )

Code
house_primary_historic_plot("Griffith", parties =4, 
                            parties_year = 2022, 
                            include_others = TRUE )